---
html_document: default
author: "Jane Acierno, Clare Kennedy, Fiery Cushman, and Jonathan Phillips"
date: "July 2025"
output: html_document
title: "Inverse option generation: Inferences about others’ values based on what comes to mind"
---

```{r setup, include=FALSE}

rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)

blackGreyPalette <- c("#2C3539", "#999999") 
se <- function(x) {sd(x)/sqrt(length(x))}

options(scipen=999)

library(tidyverse)
library(lme4)
library(lsr)
library(effsize)
library(tinytex)
library(ggdist)


knitr::opts_knit$set(root.dir = dirname(rstudioapi::getActiveDocumentContext()$path))
```


## Study 1: Inferred values of ordinary category members based on what comes to mind

### 1b: Full dataset

#### Participants 
```{r study 1 participants, include=F, echo=FALSE}
data1 <- read.csv("data/ToCS_study1ratings.csv", stringsAsFactors = F)

data1 <- data1[-c(1),]
data1 <- data1 %>% filter(CompCheck == "2") #2 is the correct answer

demo7_cols <- c("ResponseId","age","gender","race")
d_demo7 <- data1[,demo7_cols]

length(d_demo7$ResponseId) #N
table(d_demo7$gender) #Gender (male=1)
table(d_demo7$race) #Race
d_demo7$age <- as.numeric(d_demo7$age)
mean(d_demo7$age,na.rm=T) #Mean Age
sd(d_demo7$age,na.rm=T) #SD Age

```

#### Results
``` {r cleaning data set 1, include=F}

data1long <- data1 %>% pivot_longer(names_to="questioncode",values_to="response",cols=20:959) %>% filter(response != "")

data1recoded <- data1long %>%  
  mutate(condition = case_when(
    str_detect(questioncode,"like") ~ "Like",
    str_detect(questioncode,"good") ~ "Good"))

#removing the condition from question code
data1recoded$questioncode <- substr(data1recoded$questioncode, 5, nchar(data1recoded$questioncode)) 

data1recoded$cs <- sub("_(.*)", "", data1recoded$questioncode)

#labeling generation number 
data1recoded <- data1recoded %>%
  group_by(condition, cs, ResponseId) %>%
  mutate(gennum = row_number())

data1recoded$response <- as.numeric(data1recoded$response)

# mean, sd, n, and se, based on condition ("good" vs "like"), choice set (from Mills' participants), and generation number in the choice set
data1recoded.sum <- data1recoded %>% group_by(gennum,cs, condition) %>% 
                  summarise(
                    mean = mean(as.numeric(response),na.rm=T),
                    sd = sd(response,na.rm=T),
                    n = length(response),
                    se = sd/sqrt(n)
                  )

# calculate the median goodness of fit response
fitmedian <- median((data1recoded$response[data1recoded$condition == "Good"]))

#median split for context specific value (csv)
highcsv <- data1recoded[data1recoded$condition == "Good" & data1recoded$response > fitmedian,] #N=123
lowcsv <- data1recoded[data1recoded$condition == "Good" & data1recoded$response <= fitmedian,] #N=132

#label all goodness of fit responses with high or low context-specific-value
data1recoded$csv[data1recoded$response > fitmedian & data1recoded$condition == "Good"] <- "High"
data1recoded$csv[data1recoded$response <= fitmedian & data1recoded$condition == "Good"] <- "Low"

# bringing over the csv of each option next to the inferred value rating to use in an analysis later
data1recoded <- data1recoded %>%
  group_by(cs, gennum, ResponseId) %>%
  mutate(csv = ifelse(condition == "Like", first(csv[condition == "Good"]), csv))

#removed choice set 34 because original data did not have responses for this one -- participant opted to see "aquarium" which is not an individual animal
data1recoded <- data1recoded[data1recoded$cs != 34, ] 

#removed 29 and 94 from data because choice sets only included the selected response (no additional options generated). 31 already removed before data collection.
data1recoded <- data1recoded[data1recoded$cs != 29, ] 
data1recoded <- data1recoded[data1recoded$cs != 94, ] 

```

``` {r study 7 ANOVA for high and low prob and csv on inferred value, include=F}
data1responses <- read.csv("data/ToCS_study1sets.csv")

#dropping 29, 31, and 94 from data because choice sets only included the selected response (no additional options generated)
data1responses <- data1responses[data1responses$cs != 29, ] 
data1responses <- data1responses[data1responses$cs != 31, ] 
data1responses <- data1responses[data1responses$cs != 94, ] 

# removing the blank NA column
data1responses <- na.omit(data1responses)

# initiating generation number and animal columns
data1responses$gennum <- NA
data1responses$animal <- NA

# Deleting the unnecessary response time data to make my for loop easier
data1responses <- subset(data1responses, select = -considerations_rt)

# pivot longer
data1responses_long <- data1responses %>%
  pivot_longer(
    cols = starts_with("considerations"))

for (i in 1:nrow(data1responses_long)) {
  # extracting the number from the "name" column
  num <- as.numeric(gsub("considerations", "", data1responses_long[i, "name"]))
  # assigning values to gennum and animal based on the extracted number
  data1responses_long[i, "gennum"] <- num + 1
}

data1responses_long <- data1responses_long %>%
  mutate(animal = value)

data1responses_long <- data1responses_long %>%
  select(-name, -value)

# omit rows where "animal" contains an empty string
data1responses_long <- data1responses_long %>%
  filter(nzchar(animal))

data1responses_long_counts <- data1responses_long %>%
  count(animal, name = "count")

data1responses_long <- merge(data1responses_long, data1responses_long_counts, by = "animal")

totalNoptions <- sum(data1responses_long$count)

data1responses_long$prob <- data1responses_long$count / totalNoptions

probmedian <- median(data1responses_long$prob)

# median split
highprob <- data1responses_long[data1responses_long$prob > probmedian,] #N=432
lowprob <- data1responses_long[data1responses_long$prob <= probmedian,] #N=462

# removing unnecessary columns
data1responses_long <- data1responses_long %>%
  select(-answer, -answer_rt, -info, -info_rt, -count, -subject_id)

# folding in the probability data
data1responses_long <- merge(data1responses_long, data1recoded, by = c("cs", "gennum"))

# create high and low probability median split
data1responses_long$probsplit[data1responses_long$prob > probmedian] <- "High"

data1responses_long$probsplit[data1responses_long$prob <= probmedian] <- "Low"

# same ANOVA results, whether done this way or by creating a new dataframe with only "like" ratings
inferredvalueanova <- aov(response ~ probsplit * csv, data = data1responses_long %>% filter(condition == "Like"))
summary(inferredvalueanova)

# t-test for high and low context specific value
ttest_csv <- t.test(response ~ csv, data = data1responses_long)
print(ttest_csv)

# t-test for high and low probability of coming to mind
ttest_probsplit <- t.test(response ~ probsplit, data = data1responses_long)
print(ttest_probsplit)

```

``` {r study 7 cont, include=F}
# pivoting wider so that csv and inferred value are their own columns rather than contained separately under the column condition (with "Like" or "Good")

data1responses_wide <- data1responses_long %>%
  pivot_wider(names_from = condition, values_from = response)

data1responses_wide <- data1responses_wide %>%
  group_by(cs, csv, gennum, animal, ResponseId, prob, probsplit) %>%
  summarise(
    Like = coalesce(na.omit(Like), na.omit(Good)),
    Good = coalesce(na.omit(Good), na.omit(Like))
  ) %>%
  ungroup()

```

``` {r study 7 building a model, include=F}
data1likingratings <- read.csv("data/ToCS_study1genval.csv")

#preparing this data to combine with existing data
new_column_names <- sub("_51", "", colnames(data1likingratings))
colnames(data1likingratings) <- new_column_names

colnames(data1likingratings)[colnames(data1likingratings) == "redpanda"] <- "red panda"
colnames(data1likingratings)[colnames(data1likingratings) == "grizzlybear"] <- "grizzly bear"

data1likingratings_long <- data1likingratings %>%
  pivot_longer(cols = 19:84, names_to = "animal", values_to = "response") %>%
  filter(!is.na(response))

data1generalvalue <- data1likingratings_long %>%
  group_by(animal) %>%
  summarise(mean_response = mean(response, na.rm = TRUE)) %>%
  ungroup()

# just to know which animals need to be renamed to match
animals_diff <- setdiff(unique(data1responses_wide$animal), unique(data1generalvalue$animal))
animals_diff2 <- setdiff(unique(data1generalvalue$animal), unique(data1responses_wide$animal))

# renaming matching animals to prepare to combine dfs
data1generalvalue <- data1generalvalue %>%
  mutate(animal = ifelse(animal == "bigcats", "big cats", animal))
data1generalvalue <- data1generalvalue %>%
  mutate(animal = ifelse(animal == "bug", "bugs", animal))
data1generalvalue <- data1generalvalue %>%
  mutate(animal = ifelse(animal == "greatape", "great apes", animal))
data1generalvalue <- data1generalvalue %>%
  mutate(animal = ifelse(animal == "polarbear", "polar bear", animal))
data1generalvalue <- data1generalvalue %>%
  mutate(animal = ifelse(animal == "warthog", "warthogs", animal))
data1generalvalue <- data1generalvalue %>%
  mutate(animal = ifelse(animal == "prairiedog", "prairie dog", animal))
data1generalvalue <- data1generalvalue %>%
  mutate(animal = ifelse(animal == "pettingzoo", "petting zoo", animal))
data1generalvalue <- data1generalvalue %>%
  mutate(animal = ifelse(animal == "wildboar", "wild boar", animal))

# remove okapi because it was selected but not generated as part of the general choice set
data1generalvalue <- data1generalvalue %>%
  filter(animal != "okapi")

# combining the data
data1all <- merge(data1responses_wide, data1generalvalue, by = "animal", all.x = TRUE)

colnames(data1all)[colnames(data1all) == "mean_response"] <- "genval"

data1all$valdiff <-  data1all$Like - data1all$genval

# model
lmstudy7_1 <- lmer(valdiff~scale(prob)*scale(Good) + (1|cs) + (scale(prob)*scale(Good)|ResponseId), data=data1all) #full
lmstudy7_2 <- lmer(valdiff~scale(prob)+scale(Good) + (1|cs) + (scale(prob)*scale(Good)|ResponseId), data=data1all) #simplified
lmanova_study7 <- anova(lmstudy7_1, lmstudy7_2)
print(lmanova_study7) #p<.001

# main effect of prob?
lmstudy7_3 <- lmer(valdiff~scale(Good) + (1|cs) + 
(scale(prob)*scale(Good)|ResponseId), data=data1all) 

# yes main effect of prob
anova(lmstudy7_3, lmstudy7_2)


# main effect of context-specific goodness?
lmstudy7_4 <- lmer(valdiff~scale(prob) + (1|cs) + (scale(prob)*scale(Good)|ResponseId), data=data1all) 

# yes main effect of context-specific value
anova(lmstudy7_4, lmstudy7_2)

data1likegenvaldf <- data1all %>%
  group_by(animal) %>%
  summarise(Like_avg = mean(Like, na.rm = TRUE),
            genval_avg = mean(genval, na.rm = TRUE))

# testing whether people inferred that people liked the animals that came to mind above the scale midpoint. Note that value inferences are collapsed across choice-set to account for non-independence.
t.test(data1likegenvaldf$Like_avg, mu = 50)

# comparing general value (how much do people like this animal in general) and inferred value (how much does this person like this animal)
t.test(data1likegenvaldf$Like_avg, data1likegenvaldf$genval_avg, paired = T)

```

```{r}
# running it to just predict liking (not the difference), with general value as an additional regressor

lmstudy7_1a <- lmer(Like ~ scale(prob) * scale(Good) + scale(genval) + 
                   (scale(prob) * scale(Good) + scale(genval)| cs) + (1 | ResponseId), data = data1all,
                   control=lmerControl(optimizer="bobyqa"))

lmstudy7_2a <- lmer(Like ~ scale(prob) + scale(Good) + scale(genval) + 
                   (scale(prob) * scale(Good) + scale(genval)| cs) + (1 | ResponseId), data = data1all,
                   control=lmerControl(optimizer="bobyqa"))

anova(lmstudy7_1a, lmstudy7_2a)

```

```{r study 7 plots, include=F}

# median goodness of fit response
fitmedian <- median(data1all$Good)

#median split for context specific value (csv)
highcsv <- data1all[data1all$Good > fitmedian,] 
lowcsv <- data1all[data1all$Good <= fitmedian,] 

#label all goodness of fit responses with high or low context-specific-value
data1all$csvsplit[data1all$Good > fitmedian] <- "High"
data1all$csvsplit[data1all$Good <= fitmedian] <- "Low"

```

```{r new study 7 plots, include=F}

# redoing the med splits, this time with half of the animals as "high" csv and the other half "low"
data1animalcsv <- data1all %>%
  group_by(animal) %>%
  summarise(avg_good = mean(Good, na.rm = TRUE))

# calculate the mean csv score
mean_avg_good <- mean(data1animalcsv$avg_good, na.rm = TRUE)

# high and low csv
data1animalcsv$avggoodsplit <- ifelse(data1animalcsv$avg_good > mean_avg_good, "High", "Low")

data1all <- merge(data1all, data1animalcsv, by = "animal")

# sanity check
countlowcsv <- table(data1all$avggoodsplit)["Low"]
counthighcsv <- table(data1all$avggoodsplit)["High"]
print(countlowcsv) #N=2975 (2684 if using mean)
print(counthighcsv) #N=1374 (1665 if using mean)

# redoing the prob splits, this time with half of the animals as "high" csv and the other half "low"
data1animalprob <- data1all %>%
  group_by(animal) %>%
  summarise(avg_prob = mean(prob, na.rm = TRUE))

# calculate the median prob score
med_avg_prob <- mean(data1animalprob$avg_prob, na.rm = TRUE)

# high and low csv
data1animalprob$avgprobsplit <- ifelse(data1animalprob$avg_prob > med_avg_prob, "High", "Low")

data1all <- merge(data1all, data1animalprob, by = "animal")

inferredvaluetable <- data1all %>%
  group_by(avgprobsplit,avggoodsplit) %>%
  summarise(
    mean_valdiff = mean(valdiff, na.rm = TRUE),
    standard_error = sd(valdiff, na.rm = TRUE) / sqrt(n())
  )

## doing a simpler test of whether or not coming to mind was predictive of higher than out of context value
# animalSums <- group_by(animal) |>
#   summarize(inferredVal = mean())

``` 

``` {r inferred value plot, include = T}
inferredvalueplot <- ggplot(inferredvaluetable, aes(x = avgprobsplit, y = mean_valdiff, fill = avggoodsplit)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = mean_valdiff - standard_error, ymax = mean_valdiff + standard_error),
                position = position_dodge(width = 0.9),
                width = 0.25) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +  
  labs(x = "Frequency",
       y = "Inference of how much more than average the animal is liked",
       fill = "context specific\nutility") +  
  scale_fill_manual(values = c("High" = "grey70", "Low" = "grey40")) +  
  scale_y_continuous(limits = c(-5, 10), breaks = c(-5, 0, 5, 10)) + 
  theme(
    panel.background = element_rect(fill = "white", color = NA),  
    plot.background = element_rect(fill = "white", color = NA),
    legend.background = element_rect(fill = "white", color = NA),
    legend.key = element_rect(fill = "white", color = NA),
    axis.line = element_line(color = "black"),  
    axis.ticks = element_line(color = "black"),  
    axis.title = element_text(size = 16),  
    axis.text = element_text(size = 14),  
    legend.title = element_text(size = 16), 
    legend.text = element_text(size = 14), 
    text = element_text(color = "black") 
  )

plot(inferredvalueplot)
```

``` {r, include = F}
# making a folder for our figures
dir.create("figures", showWarnings = FALSE)

ggsave("figures/fig2.jpeg", plot = inferredvalueplot, width = 9, height = 8, dpi = 300)
```

### 1a: Partial dataset
```{r}
# first, identifying the 'best' and 'worst' 4 animals to use in Study 1a
worstanimals <- data1all %>% 
  rowwise() %>%
  mutate(min_value = mean(c_across(c(avg_prob, avg_good, genval)), na.rm = TRUE)) %>%
  ungroup() %>%
  arrange(min_value) %>%
  distinct(animal, .keep_all = TRUE) %>%
  slice_head(n = 4) %>%
  select(animal, avg_prob, avg_good, genval, min_value)

bestanimals <- data1all %>% 
  rowwise() %>%
  mutate(max_value = mean(c_across(c(avg_prob, avg_good, genval)), na.rm = TRUE)) %>%
  ungroup() %>%
  arrange(desc(max_value)) %>%
  distinct(animal, .keep_all = TRUE) %>%
  slice_head(n = 4) %>%
  select(animal, avg_prob, avg_good, genval, max_value)
```

#### Participants
```{r study 1a participants, include=F, echo=FALSE}
data1a <- read.csv("data/ToCS_study1a.csv", stringsAsFactors = F)

# data1a <- data1a[-c(1:2),]

demo1a_cols <- c("ResponseId","age","gender","race")
d_demo1a <- data1a[,demo1a_cols]

length(d_demo1a$ResponseId) #N
table(d_demo1a$gender) #Gender (male=1, female=2, nb=3, other=4)
table(d_demo1a$race) #Race (white=1, black=2, latino/a=3, asian=4, other=5)
d_demo1a$age <- as.numeric(d_demo1a$age)
mean(d_demo1a$age,na.rm=T) #Mean Age
sd(d_demo1a$age,na.rm=T) #SD Age

```

```{r study 1a data cleaning, include=F, echo=F}
data1a_long <- data1a %>%
  pivot_longer(cols = wildboar:penguin, names_to = "Animal", values_to = "Rating") %>%
  rowwise() %>%  
  mutate(
    Valence = factor(ifelse(Animal %in% c("koala", "redpanda", "dolphin", "penguin"), "Good", "Bad")),
    Generated = factor(ifelse(Animal %in% c(c_across(c(good1, good2, bad1, bad2))), "Generated", "Not Generated")),
    Generated = factor(Generated, levels=c("Not Generated","Generated")),    
    Animal = factor(Animal),
    Animal = fct_recode(Animal,
                        "Alligator" = "alligator",
                        "Bugs" = "bugs",
                        "Crocodile" = "crocodile",
                        "Dolphin" = "dolphin",
                        "Koala" = "koala",
                        "Penguin" = "penguin",
                        "Red Panda" = "redpanda",
                        "Wild Boar" = "wildboar"),
    Animal = factor(Animal, levels = c("Alligator","Bugs","Crocodile","Dolphin",
                                       "Koala", "Penguin","Red Panda", "Wild Boar"))
  ) %>%
  ungroup() 
```

```{r, include=F, echo=F}
data1a_sum <- data1a_long %>%
  group_by(Animal, Generated) %>%
    summarise(mean = mean(Rating, na.rm=TRUE),  
            sd = sd(Rating, na.rm = TRUE),
            n = n(),
            se = sd / sqrt(n))
```

```{r study 1a anova}

lm1a.0 <- lmer(Rating ~ Generated + Valence + (Generated|Animal) + (Generated|ResponseId), data = data1a_long)

lm1a.1 <- lmer(Rating ~ Generated + (Generated|Animal) + (Generated|ResponseId), data = data1a_long)

anova(lm1a.0,lm1a.1) ## Test of the effect of being a good animal (i.e., valence)

lm1a.2 <- lmer(Rating ~ Valence + (Generated|Animal) + (Generated|ResponseId), data = data1a_long)

anova(lm1a.0,lm1a.1) ## Test of the effect of having been generated

```

```{r 1a bar plot, include=F, echo=F}
# data1a_sum <- data1a_long %>%
#   group_by(Valence, Mentioned) %>%
#   summarise(mean = mean(Rating, na.rm=TRUE),  
#             sd = sd(Rating, na.rm = TRUE),
#             n = n(),
#             se = sd / sqrt(n))

study1abarplot <- ggplot(data1a_sum, aes(x = Animal, y = mean, fill = Generated)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), 
                position = position_dodge(width = 0.9),  
                width = 0.2) +  
  labs(title = "",
       x = "",
       y = "Inferred General Value") +
  scale_fill_manual(values = c("Not Generated" = "grey40","Generated" = "grey70")) +
  coord_cartesian(ylim = c(0, 100)) +
  theme_minimal(base_size = 16) +
  theme(panel.grid = element_blank(),
        axis.line = element_line(),
        axis.text.x = element_text(color = "black", angle = 45, vjust=.5))

study1abarplot

```

```{r}
ggsave("figures/study1abar.jpeg", plot = study1abarplot, width = 7, height = 5, dpi = 600)
```

## Study 2: Inverted Value Inferences

### Study 2a
#### Participants
```{r study 5v2 participants, include=F, echo=FALSE}
data5v2 <- read.csv("data/ToCS_study2newratings.csv", stringsAsFactors = F)

data5v2 <- data5v2[-c(1:2),]

demo5v2_cols <- c("ResponseId","age","gender","race")
d_demo5v2 <- data5v2[,demo5v2_cols]

length(d_demo5v2$ResponseId) #N
table(d_demo5v2$gender) #Gender (male=1)
table(d_demo5v2$race) #Race
d_demo5v2$age <- as.numeric(d_demo5v2$age)
mean(d_demo5v2$age,na.rm=T) #Mean Age
sd(d_demo5v2$age,na.rm=T) #SD Age

```

#### Results
```{r study 5v2, echo=FALSE, include=F, warning=FALSE}

exclude <- names(data5v2)[grep("cs_",names(data5v2))] #remove click data columns
exclude2 <- names(data5v2)[grep("Q",names(data5v2))] #remove more click data
data5v2 <- data5v2 %>% select(-c(exclude,exclude2))
data5v2 <- data5v2 %>% select(-c(1:8,10:22,1642:1646))

data5v2long <- data5v2 %>% pivot_longer(names_to="questioncode",values_to="response",cols=2:1619) %>% filter(response != "")

# label option number and condition in choice set
data5v2recoded <- data5v2long %>%  
  mutate(
    optionnum = case_when(
      str_detect(questioncode,"_1") ~ 1,
      str_detect(questioncode,"_2") ~ 2,
      str_detect(questioncode,"_3") ~ 3,
      str_detect(questioncode,"_4") ~ 4,
      str_detect(questioncode,"_5") ~ 5,
      str_detect(questioncode,"_6") ~ 6,
      str_detect(questioncode,"_7") ~ 7,
      str_detect(questioncode,"_8") ~ 8
    ),
    condition = case_when(
      str_detect(questioncode,"like") ~ "Like",
      str_detect(questioncode,"good") ~ "Good"))

# cleaning up the column names to not say 'good' or 'like'
colnames(data5v2recoded) <-  gsub("good","",colnames(data5v2recoded)) # remove the word good
colnames(data5v2recoded) <-  gsub("like","",colnames(data5v2recoded)) # remove the word like

# cleaning up the questioncode column data
data5v2recoded$questioncode <-  gsub("good","",data5v2recoded$questioncode) # remove the word good
data5v2recoded$questioncode <-  gsub("like","",data5v2recoded$questioncode) # remove the word like

# remove the option number suffix
data5v2recoded$questioncode <- gsub("_[1-9]", "", data5v2recoded$questioncode)
 
data5v2.sum <- data5v2recoded %>%
  group_by(optionnum, condition) %>%
  summarise(
    mean = mean(as.numeric(response), na.rm = TRUE),
    sd = sd(response, na.rm = TRUE),
    n = n(),
    se = sd / sqrt(n)
  )

data5v2.sum_questioncode <- data5v2recoded %>% group_by(optionnum,condition,questioncode) %>%
summarise(
mean = mean(as.numeric(response),na.rm=T),
sd = sd(response,na.rm=T),
n = length(response),
se = sd/sqrt(n)) %>% mutate(questioncode=as.numeric(questioncode))
                  

```

```{r study 5v2 plot, include=T, echo=FALSE, warning=FALSE}
data5v2.plot <- data5v2.sum %>%
  filter(condition=="Like") %>%
  ggplot(aes(y=mean,x=optionnum,color=optionnum)) +
  geom_point(stat = "identity")+
  geom_line() +
  ylab("Like estimates") +
  xlab("Option number") +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1) +
  theme_bw() +
  theme(
    plot.background = element_blank()
    ,panel.grid.major = element_blank()
    ,panel.grid.minor = element_blank()
    ,legend.title=element_blank()
    ,legend.text=element_text(size=rel(2))
    ,legend.justification=c(1,1)
    ,axis.text.y=element_text(size=rel(1.4))
    ,axis.title.y=element_text(size=rel(1.3),vjust=.9)
    ,axis.title.x=element_text(size=rel(1.3),vjust=.9)
    ,axis.text.x=element_text(size=rel(1.5))
    ,axis.ticks = element_blank()
    ,legend.position = "none"
  )

plot(data5v2.plot)


```

```{r morris study participants, include=F, echo=FALSE}
morrisdata <- read.csv("data/ToCS_study2originalratings.csv", stringsAsFactors = F)

morrisdata = morrisdata[-1,]

excludemorris1 <- names(morrisdata)[grep("time",names(morrisdata))]
excludemorris2 <- names(morrisdata)[grep("Q159",names(morrisdata))]
excludemorris3 <- names(morrisdata)[grep("cs",names(morrisdata))]
morrisdata <- morrisdata %>% select(-c(2:20,excludemorris1,excludemorris2,excludemorris3))
colnames(morrisdata)[1] ="questioncode"
morrisdata$questioncode <- as.numeric(morrisdata$questioncode)

# removing option 1
option1 <- names(morrisdata)[grep("_1",names(morrisdata))]
morrisdata <- morrisdata %>% select(-c(option1))

#change cols to 19 if adding option 1 back in
morrisdatalong <- morrisdata %>% pivot_longer(names_to="question",values_to="response",cols=2:17) %>% filter(response != "")

morrisdatarecoded <- morrisdatalong %>% 
  mutate(condition = case_when(
    str_detect(question,"val_q") ~ "Like",
    str_detect(question,"val_other") ~ "Good"),
    optionnum = case_when(
      str_detect(question,"2") ~ 1,
      str_detect(question,"3") ~ 2,
      str_detect(question,"4") ~ 3,
      str_detect(question,"5") ~ 4,
      str_detect(question,"6") ~ 5,
      str_detect(question,"7") ~ 6,
      str_detect(question,"8") ~ 7,
      str_detect(question,"9") ~ 8
    ))

morrisdatarecoded$response <- as.numeric(morrisdatarecoded$response)

#testing whether the order in which an option came to mind was predictive of how much that option was “generally liked” by the person who came up with it
orderpredictsliking_lm <- lmerTest::lmer(as.numeric(response) ~ as.numeric(optionnum) + (as.numeric(optionnum)|questioncode), data = morrisdatarecoded %>% filter(condition == "Like"))

summary(orderpredictsliking_lm) # t(112.45) = -2.192, p < .05

morrisdata.sum <- morrisdatarecoded %>% group_by(optionnum,condition) %>%
                  summarise(
                    mean = mean(as.numeric(response),na.rm=T),
                    sd = sd(response,na.rm=T),
                    n = length(response),
                    se = sd/sqrt(n)
                  )

combineddata <- morrisdatarecoded %>%
  left_join(data5v2.sum_questioncode, by = c("optionnum", "condition", "questioncode")) %>%
  drop_na()

cor.test(as.numeric(combineddata$response[combineddata$condition=="Good"]),
                    combineddata$mean[combineddata$condition=="Good"])
cor.test(as.numeric(combineddata$response[combineddata$condition=="Like"]),
                    combineddata$mean[combineddata$condition=="Like"])

# regardless of option order, can participants infer which options were more liked than others? 
lmCom_0 <- lmerTest::lmer(as.numeric(response) ~ mean + optionnum + (mean|questioncode), data=combineddata %>% filter(condition == "Like"))

# yes, participants’ inferred preference ratings linearly predicted prior participants’ actual preference rating.
summary(lmCom_0) #t(749.73) = 7.867, p < .001


```

``` {r, include=F}
# we want to predict how much prior participants liked particular options that came to mind. We found that this was the case: participants’ inferred preference ratings linearly predicted prior participants actual preference rating.

morrisdatarecoded$questioncode <- as.numeric(morrisdatarecoded$questioncode)
data5v2recoded$questioncode <- as.numeric(data5v2recoded$questioncode)

combineddata2 <- morrisdatarecoded %>% left_join(data5v2recoded, by=c("optionnum","condition","questioncode")) %>%
  drop_na()

names(combineddata2)[names(combineddata2) == "response.y"] <- "inferredrating"
combineddata2$inferredrating <- as.numeric(combineddata2$inferredrating)

names(combineddata2)[names(combineddata2) == "response.x"] <- "initialrating"
combineddata2$initialrating <- as.numeric(combineddata2$initialrating)

combineddata2_likeonly <- combineddata2[combineddata2$condition == "Like", ]

combineddata2_likeonly$ResponseId <- as.factor(combineddata2_likeonly$ResponseId)

### Does option order matter above and beyond initial rating?
lm2.0 <- lmerTest::lmer(inferredrating ~ optionnum + initialrating + (1|ResponseId) + (optionnum|questioncode), data = combineddata2_likeonly)
summary(lm2.0)  #yes, t(89.59) = -6.767, p < .001

```

```{r morris study plot, include=F, echo=FALSE}
# No options were inferred on average to be liked less than 2.4, with all but 2 (i.e., 2.4 and 2.9) above a mean score of 3. This explains the x-axis displacement for Like ratings.

combinedCorrelation <- combineddata %>% 
  ggplot(aes(y=as.numeric(response),x=mean,color=condition)) +
  geom_point(stat = "identity",position = position_jitter())+
  geom_smooth(method=lm) +
  ggtitle("Correlation between original ratings and inferred value") +
  ylab("original rating") +
  xlab("inferred value") +
  facet_grid(~condition) +
  theme_bw() +
  theme(
    plot.background = element_blank()
    ,panel.grid.major = element_blank()
    ,panel.grid.minor = element_blank()
    ,legend.title=element_blank()
    ,legend.text=element_text(size=rel(2))
    ,legend.justification=c(1,1) 
    ,axis.text.y=element_text(size=rel(1.4))
    ,axis.title.y=element_text(size=rel(1.3),vjust=.9)
    ,axis.title.x=element_text(size=rel(1.3),vjust=.9)
    ,axis.text.x=element_text(size=rel(1.5))
    ,axis.ticks = element_blank()
    ,legend.position = "none"
  )

plot(combinedCorrelation)
```

```{r morris study plot 2, echo=FALSE, include=F, warning=FALSE}

morrisdata.corplot <- morrisdata.sum %>% 
  filter(condition=="Like") %>%
  ggplot(aes(y=mean,x=optionnum,fill=optionnum)) +
  geom_point(stat = "identity")+
  geom_line() +
  ggtitle("Correlation between like ratings and option number") +
  ylab("Liking rating") +
  xlab("Option number") +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1, position=position_dodge(.9)) +
  # facet_grid(~optionnum) +
  theme_bw() +
  theme(
    plot.background = element_blank()
    ,panel.grid.major = element_blank()
    ,panel.grid.minor = element_blank()
    ,legend.title=element_blank()
    ,legend.text=element_text(size=rel(2))
    ,legend.justification=c(1,1) 
    ,axis.text.y=element_text(size=rel(1.4))
    ,axis.title.y=element_text(size=rel(1.3),vjust=.9)
    ,axis.title.x=element_text(size=rel(1.3),vjust=.9)
    ,axis.text.x=element_text(size=rel(1.5))
    ,axis.ticks = element_blank()
    ,legend.position = "none"
  )

plot(morrisdata.corplot)

```

``` {r combined morris and new data plot, include=T}

# source columns to differentiate the dfs
data5v2.sum <- data5v2.sum %>% mutate(source = "data5")
morrisdata.sum <- morrisdata.sum %>% mutate(source = "morris")

# combine the dfs
study5combined <- bind_rows(data5v2.sum, morrisdata.sum)

# filter the combined data
study5combined_filtered <- study5combined %>% filter(condition == "Like")

# create the combined plot
study5combined_plot <- ggplot(study5combined_filtered, aes(x = optionnum, y = mean, color = source, group = source)) +
  geom_point(stat = "identity", position = position_dodge(width = 0.1)) +
  geom_line(color = "grey50", position = position_dodge(width = 0.1)) +
  scale_color_manual(values = c("black", "grey50"), 
                     labels = c("inferred preference", "actual preference")) +
  ggtitle("Correlation between like ratings and option number") +
  ylab("How much a food is liked") +
  xlab("Option number") +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .1, position = position_dodge(0.1)) +
  scale_x_continuous(breaks = 1:8) +
  scale_y_continuous(limits = c(3, 7)) +
  theme_bw() +
  theme(
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.title = element_blank(),
    legend.text = element_text(size = rel(1.2)),
    legend.justification = c(0, 1),  # Adjust justification to move legend to the bottom-left
    legend.position = c(0.05, 0.35),  # Adjust position coordinates for fine-tuning
    axis.text.y = element_text(size = rel(1.4)),
    axis.title.y = element_text(size = rel(1.3), vjust = .9),
    axis.title.x = element_text(size = rel(1.3), vjust = .9),
    axis.text.x = element_text(size = rel(1.5)),
    axis.ticks = element_blank()
  )

plot(study5combined_plot)

```


### Study 2b
#### Participants
```{r study 2b participants, include=F, echo=FALSE}
data2b <- read.csv("data/ToCS_study2b.csv", stringsAsFactors = F)

data2b <- data2b[-c(1:2),]
data2b$age <- as.numeric(data2b$age)

demo2b_cols <- c("age","gender","gender_4_TEXT", "race", "race_5_TEXT")
d_demo2b <- data2b[,demo2b_cols]

length(data2b$ResponseId) #N
table(d_demo2b$gender) #Gender (male=1)
table(d_demo2b$race) #Race (white = 1, black = 2, latino/a = 3, asian = 4, other = 5)
mean(d_demo2b$age,na.rm=T) #Mean Age
sd(d_demo2b$age,na.rm=T) #SD Age

```

#### Results
```{r study 5v2, echo=FALSE, include=F, warning=FALSE}
exclude2b_1 <- names(data2b)[grep("cs_",names(data2b))] #remove click data columns
exclude2b_2 <- names(data2b)[grep("Q",names(data2b))] #remove more click data
data2b <- data2b %>% select(-c(exclude2b_1,exclude2b_2,id,FL_5_DO,demo2b_cols))
data2b <- data2b %>% select(-c(1:8,10:22))

# identify columns that end with '_DO'
DOcols <- grep("_DO$", names(data2b), value = TRUE)

# remove 'like' from these columns for ease of data cleaning
names(data2b)[names(data2b) %in% DOcols] <- gsub("like", "", names(data2b)[names(data2b) %in% DOcols])

# splitting the dfs because it's too hard to parse otherwise
# first, dealing with the value ratings (both context-specific and general value)
data2b_value <- data2b %>%
  select(ResponseId, starts_with("like"), starts_with("good"))

data2b_value_long <- data2b_value %>% pivot_longer(names_to="questioncode",values_to="response",cols=2:1617) %>% filter(response != "")

data2b_value_long$response <- as.numeric(as.character(data2b_value_long$response))

# label option number and condition in choice set
data2b_value_recoded <- data2b_value_long %>%  
  mutate(
    optionnum = case_when(
      str_detect(questioncode,"_1") ~ 1,
      str_detect(questioncode,"_2") ~ 2,
      str_detect(questioncode,"_3") ~ 3,
      str_detect(questioncode,"_4") ~ 4,
      str_detect(questioncode,"_5") ~ 5,
      str_detect(questioncode,"_6") ~ 6,
      str_detect(questioncode,"_7") ~ 7,
      str_detect(questioncode,"_8") ~ 8
    ),
    condition = case_when(
      str_detect(questioncode,"like") ~ "Like",
      str_detect(questioncode,"good") ~ "Good"))

# cleaning up the column names to not say 'good' or 'like'
colnames(data2b_value_recoded) <-  gsub("good","",colnames(data2b_value_recoded)) # remove the word good
colnames(data2b_value_recoded) <-  gsub("like","",colnames(data2b_value_recoded)) # remove the word like

# cleaning up the questioncode column data
data2b_value_recoded$questioncode <-  gsub("good","",data2b_value_recoded$questioncode) # remove the word good
data2b_value_recoded$questioncode <-  gsub("like","",data2b_value_recoded$questioncode) # remove the word like

# remove the option number suffix
data2b_value_recoded$questioncode <- gsub("_[1-9]", "", data2b_value_recoded$questioncode)
```

```{r}
# now, dealing with the messy order data
data2b_order <- data2b %>%
  select(ResponseId, ends_with("_DO"))

data2b_order_long <- data2b_order %>%
  pivot_longer(
    cols = ends_with("_DO"), 
    names_to = "questioncode", 
    values_to = "order"
  ) %>%
  mutate(
    questioncode = sub("_DO$", "", questioncode) # extracting the question number
  ) %>%
  filter(order != "") %>% # dropping all rows that people didn't respond to!
  separate_rows(order, sep = "\\|") %>% # splitting 'order' into multiple rows
  group_by(ResponseId, questioncode) %>% # grouping by id and question number
  mutate(
    vieworder = row_number(), # create a column 'vieworder' as sequential numbers
    optionnum = as.integer(order) # converting 'order' values to integers for 'optionnum'
  ) %>%
  ungroup() %>%
  select(-order) # dropping the original 'order' column since no longer needed

```

```{r}
data2b_combined <- data2b_value_recoded %>%
  inner_join(data2b_order_long, by = c("ResponseId", "questioncode", "optionnum"))
```

```{r}
data2b.sum <- data2b_combined %>%
  group_by(vieworder, condition) %>%
  summarise(
    mean = mean(as.numeric(response), na.rm = TRUE),
    sd = sd(response, na.rm = TRUE),
    n = n(),
    se = sd / sqrt(n)
  )

data2b.sum_like <- data2b.sum %>% 
  filter(condition == "Like")

data2b.sum_questioncode <- data2b_combined %>% group_by(optionnum,vieworder,condition,questioncode) %>%
summarise(
mean = mean(as.numeric(response),na.rm=T),
sd = sd(response,na.rm=T),
n = length(response),
se = sd/sqrt(n)) %>% mutate(questioncode=as.numeric(questioncode))
```

```{r}
# avoiding confusion over which ratings were the original participant ratings
morrisdatarecoded <- morrisdatarecoded %>%
  rename(`initialrating` = response)

morrisdatarecoded <- morrisdatarecoded %>%
  mutate(questioncode = as.character(questioncode)) 

data2b_full <- morrisdatarecoded %>%
  inner_join(data2b_combined, by = c("questioncode", "optionnum", "condition"))

# creating a df with only general value
data2b_full_like <- data2b_full %>% 
  filter(condition == "Like")

# dropping choice sets with only 1 item -- from 195 to 187
# I see there was never a set 72...
data2b_full_like_filtered1 <- data2b_full_like %>%
  group_by(ResponseId, questioncode) %>%
  filter(n_distinct(optionnum) > 1) %>%
  ungroup()

# dropping choice sets with <4 items -- from 195 to 106
data2b_full_like_filtered4 <- data2b_full_like %>%
  group_by(ResponseId, questioncode) %>%
  filter(n_distinct(optionnum) > 3) %>%
  ungroup()

### Does option order matter above and beyond initial rating?
# failed to converge
# lmstudy2b <- lmerTest::lmer(response ~ optionnum + initialrating + (1|ResponseId) + (optionnum|questioncode), data = data2b_full_like_filtered1, control=lmerControl(optimizer="bobyqa"))

# removed the correlation term
lmstudy2b <- lmerTest::lmer(response ~ optionnum + initialrating + (1|ResponseId) + (1|questioncode), data = data2b_full_like_filtered1)

# running with choice sets with >3 items
# this time it converges: t(58.61)=-1.912, p=.0607
# lmstudy2b <- lmerTest::lmer(response ~ optionnum + initialrating + (1|ResponseId) + (optionnum|questioncode), data = data2b_full_like_filtered4)

# averaging across all variance, we see the pattern we expect
# t(6)=-4.8334, p=.0029, correlation = -.892
cor.test(data2b.sum_like$vieworder,data2b.sum_like$mean)

```
```{r plot 2b}
# filter the combined data
data2b.sum_like <- data2b.sum %>% filter(condition == "Like")

# create the combined plot
data2bplot <- ggplot(data2b.sum_like, aes(x = vieworder, y = mean)) +
  geom_point(stat = "identity", position = position_dodge(width = 0.1)) +
  geom_line(color = "grey50", position = position_dodge(width = 0.1)) +
  ggtitle("Correlation between like ratings and viewed order") +
  ylab("Inference of how much a food is liked") +
  xlab("Viewing order") +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .1, position = position_dodge(0.1)) +
  scale_x_continuous(breaks = 1:8) +
  scale_y_continuous(limits = c(3, 7)) +
  theme_bw() +
  theme(
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.title = element_blank(),
    legend.text = element_text(size = rel(1.2)),
    legend.justification = c(0, 1), 
    legend.position = c(0.05, 0.35),
    axis.text.y = element_text(size = rel(1.4)),
    axis.title.y = element_text(size = rel(1.3), vjust = .9),
    axis.title.x = element_text(size = rel(1.3), vjust = .9),
    axis.text.x = element_text(size = rel(1.5)),
    axis.ticks = element_blank()
  )

plot(data2bplot)

ggsave("figures/study2bplot.jpeg", plot = data2bplot, device = "jpeg", width = 8, height = 5.5, units = "in")

```

## Study 3: Character inferences based on immoral and bizarre options

### 3a: Immoral Thoughts

#### Participants
```{r participants immoral,include=FALSE,echo=FALSE}

d3a<- read.csv("data/ToCS_study3a.csv",stringsAsFactors = F)

demo3a_cols <- c("ResponseId","age","gender","race")
d_demo3a <- d3a[,demo3a_cols]

length(d_demo3a$ResponseId) #N
table(d_demo3a$gender) #Gender
table(d_demo3a$race) #Race
mean(d_demo3a$age,na.rm=T) #Mean Age
sd(d_demo3a$age,na.rm=T) #SD Age

```

#### Results
```{r exp3a results 1, echo=FALSE, include=FALSE, warning=FALSE}

data3a <- d3a[c(9,19:31)]

colnames(data3a)<-c("Subject","PencilImmoral","PencilNeutral","GunImmoral",
                  "GunNeutral","ScrewdriverImmoral","ScrewdriverNeutral","KnifeImmoral",
                  "KnifeNeutral","LightsImmoral","LightsNeutral","WhipImmoral","WhipNeutral","explanation")

data.long3a<-data3a%>%gather(question,response,-c("Subject","explanation"),na.rm=TRUE) %>%
  filter(response!="") %>%
  mutate(response=as.numeric(response),
         question=factor(question),
         condition=factor(c("Immoral","Neutral","Immoral","Neutral","Immoral","Neutral","Immoral","Neutral","Immoral","Neutral","Immoral","Neutral")[question]),
         item=factor(c("gun","gun","knife","knife","lights","lights","pencil","pencil","screwdriver","screwdriver","whip","whip")[question]),
         itemKind=factor(c("Dangerous","Dangerous","Dangerous","Dangerous","Safe","Safe","Safe","Safe","Safe","Safe","Dangerous","Dangerous")[question])) 


data.sum3a <- data.long3a %>% group_by(condition,itemKind) %>%
  summarise(mean=mean(response, na.rm = TRUE),
            sd=sd(response, na.rm = TRUE),
            n=length(response),
            se=sd/sqrt(n))

```

```{r exp3a results 2, echo=FALSE, include=F, warning=FALSE}
ggplot(data.sum3a,aes(y=mean,x=condition,fill=condition))+
  ggtitle("Character Ratings for Immoral and Neutral Thoughts by Object Type") +
  facet_grid(~itemKind) +
  ylab("Mean Character Rating") +
  xlab("Thought Type") +
  geom_bar(stat="identity",position="dodge")+
  theme_bw() +
  geom_errorbar(aes(ymin=mean-se,ymax=mean+se),width=.1,position=position_dodge(.9))+
  theme(legend.position = "none"
    ,plot.background = element_blank()
    ,panel.grid.major = element_blank()
    ,panel.grid.minor = element_blank()
    ,axis.text.y=element_text(size=rel(1.4))
    ,axis.title.y=element_text(size=rel(1.3),vjust=.9)
    ,axis.title.x=element_blank()
    ,axis.text.x=element_text(size=rel(1.5)))
```

``` {r exp3a results 3, echo=FALSE, include=FALSE, warning=FALSE}

# cannot assume equal variance
var.test(data.long3a$response[data.long3a$condition=="Immoral"],data.long3a$response[data.long3a$condition=="Neutral"]) # p=.017

t.test(data.long3a$response[data.long3a$condition=="Immoral"],data.long3a$response[data.long3a$condition=="Neutral"],var.equal = FALSE) # p=.01

t.test(data.long3a$response[data.long3a$itemKind=="Safe"],data.long3a$response[data.long3a$itemKind=="Dangerous"]) # p<.001

# safe objects
t.test(data.long3a$response[data.long3a$condition=="Immoral"&data.long3a$itemKind=="Safe"],data.long3a$response[data.long3a$condition=="Neutral"&data.long3a$itemKind=="Safe"]) # p<.005 -- main comparison of interest

# dangerous objects
t.test(data.long3a$response[data.long3a$condition=="Immoral"&data.long3a$itemKind=="Dangerous"],data.long3a$response[data.long3a$condition=="Neutral"&data.long3a$itemKind=="Dangerous"]) # p=.53

# immoral thoughts
t.test(data.long3a$response[data.long3a$condition=="Immoral"&data.long3a$itemKind=="Dangerous"],data.long3a$response[data.long3a$condition=="Immoral"&data.long3a$itemKind=="Safe"]) # p<.001

# assume equal variance
var.test(data.long3a$response[data.long3a$condition=="Neutral"&data.long3a$itemKind=="Dangerous"],data.long3a$response[data.long3a$condition=="Neutral"&data.long3a$itemKind=="Safe"]) # p=.449

t.test(data.long3a$response[data.long3a$condition=="Neutral"&data.long3a$itemKind=="Dangerous"],data.long3a$response[data.long3a$condition=="Neutral"&data.long3a$itemKind=="Safe"],var.equal =TRUE) # p<.005

summary(aov(response~condition*itemKind,data=data.long3a))

```

### 3b: Bizarre Thoughts

#### Participants

```{r participants bizarre, include=FALSE, echo=FALSE}
d3b<- read.csv("data/ToCS_study3b.csv",stringsAsFactors = F)
#no demographic info collected
```

#### Results
```{r exp1b 1, echo=FALSE, include=FALSE, warning=FALSE}

data3b <- d3b[c(9,19:31)]

colnames(data3b)<-c("Subject","PencilBizarre","PencilNeutral","GunBizarre",
                  "GunNeutral","ScrewdriverBizarre","ScrewdriverNeutral","KnifeBizarre",
                  "KnifeNeutral","LightsBizarre","LightsNeutral","WhipBizarre","WhipNeutral","explanation")

data.long3b<-data3b%>%gather(question,response,-c("Subject","explanation"),na.rm=TRUE) %>%
  filter(response!="") %>%
  mutate(response=as.numeric(response),
         question=factor(question),
         condition=factor(c("Bizarre","Neutral","Bizarre","Neutral","Bizarre","Neutral",
                            "Bizarre","Neutral","Bizarre","Neutral","Bizarre","Neutral")[question]),
         item=factor(c("gun","gun","knife","knife","lights","lights","pencil","pencil","screwdriver","screwdriver","whip","whip")[question]),
         itemKind=factor(c("Dangerous","Dangerous","Dangerous","Dangerous","Safe","Safe","Safe","Safe","Safe","Safe","Dangerous","Dangerous")[question])) 

data.sum3b <- data.long3b %>% group_by(condition,itemKind,item) %>%
  summarise(mean=mean(response, na.rm = TRUE),
            sd=sd(response, na.rm = TRUE),
            n=length(response),
            se=sd/sqrt(n))

```

```{r exp1b 4, echo=FALSE, include=FALSE, warning=FALSE}
var.test(data.long3b$response[data.long3b$condition=="Bizarre"],data.long3b$response[data.long3b$condition=="Neutral"])

t.test(data.long3b$response[data.long3b$condition=="Bizarre"],data.long3b$response[data.long3b$condition=="Neutral"],var.equal =TRUE) # p = .049

t.test(data.long3b$response[data.long3b$itemKind=="Dangerous"],data.long3b$response[data.long3b$itemKind=="Safe"],var.equal =TRUE) # p = .944

t.test(data.long3b$response[data.long3b$condition=="Bizarre"&data.long3b$itemKind=="Dangerous"],data.long3b$response[data.long3b$condition=="Neutral"&data.long3b$itemKind=="Dangerous"],var.equal =TRUE) # p=.01

t.test(data.long3b$response[data.long3b$condition=="Neutral"&data.long3b$itemKind=="Dangerous"],data.long3b$response[data.long3b$condition=="Neutral"&data.long3b$itemKind=="Safe"],var.equal =TRUE) # p=.0.2424

t.test(data.long3b$response[data.long3b$condition=="Bizarre"&data.long3b$itemKind=="Safe"],data.long3b$response[data.long3b$condition=="Neutral"&data.long3b$itemKind=="Safe"],var.equal =TRUE) # p=.7, showing that people don't just judge abnormal cognition harshly 

anova(lm(response~condition*
           question,data=data.long3b))

summary(aov(response~condition*itemKind,data=data.long3b))

```

### 3c: Replication

#### Participants
```{r replication, include=FALSE, echo=FALSE}
data3c<- read.csv("data/ToCS_study3c.csv",stringsAsFactors = F)

demo3c_cols <- c("ResponseId","age","gender","race")
d_demo3c <- data3c[,demo3c_cols]

length(d_demo3c$ResponseId) #N
table(d_demo3c$gender) #Gender
table(d_demo3c$race) #Race
mean(d_demo3c$age,na.rm=T) #Mean Age
sd(d_demo3c$age,na.rm=T) #SD Age

```

#### Results
```{r drep cleaning, include=F,  echo=FALSE}
data3cl <- data3c %>% select(ResponseId,Pencil.immoral_10:Whip.Bizarre_8) %>%
                  gather(condition,response,-c(ResponseId),na.rm = T) %>%
                  mutate(condition = factor(condition),
                         item = factor(rep(c("Gun","Knife","Lights","Pencil","Screwdriver",
                                             "Whip"),each=3)[condition]),
                         thought = factor(rep(c("Bizarre","Immoral","Neutral"),6)[condition]),
                         danger = factor(c("Dangerous","Dangerous","Safe","Safe","Safe","Dangerous")[item]))

```

```{r data3 plot prep, include=F, echo=F}

data3c.sum <- data3cl %>% group_by(thought,danger) %>% 
                  summarise(
                    mean = mean(response,na.rm=T),
                    sd = sd(response,na.rm=T),
                    n = length(response),
                    se = sd/sqrt(n)
                  )
```

```{r data3 analysis, include=F, echo=F}
summary(aov(lm(response~thought*danger,data=data3cl)))

var.test(data3cl$response[data3cl$danger=="Dangerous"],data3cl$response[data3cl$danger=="Safe"]) #effect of object danger (safe vs dangerous), p<.01

t.test(data3cl$response[data3cl$danger=="Dangerous"],data3cl$response[data3cl$danger=="Safe"],var.equal =TRUE) # p<.001

summary(aov(response~thought,data=data3cl))

print(data3cl %>%
  group_by(thought) %>%
  summarize(meanresponse3c = mean(response, na.rm = TRUE))
)

t.test(data3cl$response[data3cl$thought=="Immoral"&data3cl$danger=="Dangerous"],data3cl$response[data3cl$thought=="Immoral"&data3cl$danger=="Safe"],var.equal =TRUE) #p<.001

summary(aov(response ~ thought, data = data3cl, subset = (danger == "Safe")))

t.test(data3cl$response[data3cl$thought=="Immoral"&data3cl$danger=="Safe"],data3cl$response[data3cl$thought=="Neutral"&data3cl$danger=="Safe"],var.equal =TRUE)

t.test(data3cl$response[data3cl$thought=="Immoral"&data3cl$danger=="Safe"],data3cl$response[data3cl$thought=="Bizarre"&data3cl$danger=="Safe"],var.equal =TRUE)

t.test(data3cl$response[data3cl$thought=="Neutral"&data3cl$danger=="Safe"],data3cl$response[data3cl$thought=="Bizarre"&data3cl$danger=="Safe"],var.equal =TRUE)

t.test(data3cl$response[data3cl$thought=="Neutral"&data3cl$danger=="Safe"],data3cl$response[data3cl$thought=="Neutral"&data3cl$danger=="Dangerous"],var.equal =TRUE)

```

``` {r data3 plot, include=F}
data3c.sum$thought <- factor(data3c.sum$thought, levels = c("Immoral", "Bizarre", "Neutral"))

data3c.plot <- data3c.sum %>%
  mutate(mean = 100 - mean,  # Reverse code the mean values
         danger = fct_rev(danger)) %>%  # Reverse the order of the levels in the 'danger' factor
  ggplot(aes(x=thought, y=mean, fill=thought)) +
  geom_bar(stat="identity", position="dodge") +
  ylab("Mean immorality rating") +
  xlab("") +
  facet_grid(~danger) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.1, position=position_dodge(.9)) +
  theme_bw() +
  theme(
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.title = element_blank(),
    legend.text = element_blank(),
    axis.text.y = element_text(size=rel(1.4)),
    axis.title.y = element_text(size=rel(1.3), vjust=.9),
    axis.title.x = element_blank(),
    axis.text.x = element_text(size=rel(1.5)),
    axis.ticks = element_blank(),
    strip.text = element_text(size=rel(1.5))  
  ) +
  scale_fill_manual(values=c("Bizarre"="#CCCCCC", "Immoral"="#999999", "Neutral"="#333333"))

ggsave("figures/fig4.jpeg", plot = data3c.plot, device = "jpeg", width = 8, height = 5.5, units = "in")

```

## Study 4: Inverse Values Moral Judgment Study

### Participants

```{r study 4 participants, include=F, echo=FALSE}
d4<- read.csv("data/ToCS_study4.csv",stringsAsFactors = F)

demo4_cols <- c("TurkID","Age","Gender","Race") # needed to use the first 5 digits of TurkIDs because no response ID available
d_demo4 <- d4[,demo4_cols]

length(d_demo4$TurkID) #N
table(d_demo4$Gender) #Gender
table(d_demo4$Race) #Race
mean(d_demo4$Age,na.rm=T) #Mean Age
sd(d_demo4$Age,na.rm=T) #SD Age

```

### Results

```{r study 4 data, echo=FALSE, include=FALSE, warning=FALSE}

data4 <- d4[,c(18:27)]

colnames(data4)<-c("TurkID", "NeoNazis-training", "NeoNazis-human rights","Race-training", "Race-human rights","Spy-training", "Spy-human rights","Homophobia-training", "Homophobia-human rights","Explanation")


data4.long<-data4%>%gather(question,response,-c("TurkID", "Explanation"),na.rm=TRUE) %>%
  filter(response!="") %>%
  mutate(response=factor(response),
         question=factor(question),
         condition=factor(c("Human Rights","Training","Human Rights","Training","Human Rights","Training","Human Rights","Training")[question]),
         situation=factor(c("Homophobia","Homophobia","NeoNazis","NeoNazis","Race","Race","Spy","Spy")[question])
  ) %>%
mutate(ResponseCoded=case_when
       (situation %in% c("Homophobia","NeoNazis") & response=="John" ~ "Easy",
         situation %in% c("Homophobia","NeoNazis") & response=="Sarah" ~ "Hard",
         situation %in% c("Race", "Spy") & response=="Sarah" ~ "Easy",
         situation %in% c("Race", "Spy") & response=="John" ~ "Hard"
), ResponseCoded=factor(ResponseCoded)
)


```


```{r study 4 graph 1, echo=FALSE, warning=FALSE}
data4.sum <- data4.long %>% group_by(condition,ResponseCoded) %>%
  summarise(count=n()) %>% mutate(percent_selected=(count*100)/sum(count))

# error bars for a percentage
data4.sum <- data4.sum %>%
  mutate(
    proportion = percent_selected / 100,  
    se = sqrt(proportion * (1 - proportion) / 398),  # Calculate standard error using the sample size (N=398)
    ymin = percent_selected - (se * 100),  
    ymax = percent_selected + (se * 100)
  )

data4.plot <- data4.sum %>% 
  ggplot(aes(y = percent_selected, x = condition, fill = condition)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = ymin, ymax = ymax), 
                position = position_dodge(width = 0.9),
                width = 0.25) +
  ylab("Percentage of Character Selected") +
  xlab("Condition") +
  facet_grid(~ResponseCoded) +
  theme_bw() +
  theme(
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.title = element_blank(),
    legend.text = element_text(size = rel(2)),
    legend.justification = c(1, 1),
    axis.text.y = element_text(size = rel(1.4)),
    axis.title.y = element_text(size = rel(1.3), vjust = .9),
    axis.title.x = element_text(size = rel(1.3), vjust = .9),
    axis.text.x = element_text(size = rel(1.5)),
    axis.ticks = element_blank(),
    legend.position = "none",
    strip.text = element_text(size = rel(1.3))
  ) +
  scale_fill_manual(values = c("Human Rights" = "grey70", "Training" = "grey40")) +
  coord_cartesian(ylim = c(0, 100))

plot(data4.plot)

ggsave("figures/fig5.jpeg", plot = data4.plot, device = "jpeg", width = 8, height = 5.5, units = "in")

```

```{r study 2 lm, echo=FALSE, include=F, warning=FALSE}
lm2.0 <- glmer(ResponseCoded~condition+(condition|situation),data = data4.long, family = binomial)
lm2.1 <- glmer(ResponseCoded~(condition|situation),data = data4.long, family=binomial)
anova(lm2.0,lm2.1)
```
